Read data
source(paste0(root.dir, '../step2/scripts/myRLib.R'))
intermediate.files <- strsplit('../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/covar_LDpred_p1.0000e-03.txt:../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/covar_LDpred_p3.0000e-04.txt:../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/covar_LDpred_p1.0000e-04.txt', ':')[[1]]
ps <- strsplit('0.001,0.0003,0.0001', ',')[[1]]
expression.files <- strsplit('output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Artery_Coronary_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/DGN-HapMap-2015_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Whole_Blood_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Cells_EBV-transformed_lymphocytes_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Thyroid_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Stomach_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Small_Intestine_Terminal_Ileum_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Esophagus_Muscularis_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Spleen_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Pancreas_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Esophagus_Gastroesophageal_Junction_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Adrenal_Gland_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Adipose_Visceral_Omentum_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Adipose_Subcutaneous_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Pituitary_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Colon_Transverse_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Esophagus_Mucosa_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Liver_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Lung_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Muscle_Skeletal_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Cells_Transformed_fibroblasts_predicted_expression.txt', ':')[[1]]
ts <- strsplit('Artery_Coronary,DGN-HapMap-2015,Whole_Blood,Cells_EBV-transformed_lymphocytes,Thyroid,Stomach,Small_Intestine_Terminal_Ileum,Esophagus_Muscularis,Spleen,Pancreas,Esophagus_Gastroesophageal_Junction,Adrenal_Gland,Adipose_Visceral_Omentum,Adipose_Subcutaneous,Pituitary,Colon_Transverse,Esophagus_Mucosa,Liver,Lung,Muscle_Skeletal,Cells_Transformed_fibroblasts', ',')[[1]]
gene <- read.table(getFile(root.dir, 'output-prepare_gene_id/fastInsulin__wtccc_t2d__wtccc_ctrl.genelist'), header = F, sep = '\t')
colnames(gene) <- c('gene.name', 'gencode.id')
yi <- read.table(getFile(root.dir, '../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/yi.logistic.assoc'), header = T)
cols <- colnames(yi)
yi.filenames <- apply(yi, 1, function(x) { return(basename(x[1]))})
yi.container <- c()
expression.list <- list()
for(i in 1 : length(ts)) {
expression.list[[ts[i]]] <- read.table(getFile(root.dir, expression.files[i]), header = T, sep = ' ')
}
intermediate.list <- list()
for(i in 1 : length(ps)) {
thisfile <- basename(intermediate.files[i])
yi.container <- rbind(yi.container, c(ps[i], yi[yi.filenames == thisfile, 2:7]))
intermediate.list[[ps[i]]] <- read.table(getFile(root.dir, intermediate.files[i]), header = T, sep = ' ')
intermediate.list[[ps[i]]]$PHENO <- intermediate.list[[ps[i]]]$PHENO - 1
}
yi.container <- data.frame(yi.container)
colnames(yi.container) <- c('causal.fraction', cols[2:7])
pander(gene, knitr.auto.asis = T)
| RAB38 |
ENSG00000123892.7 |
| UHRF1 |
ENSG00000034063.9 |
| UHRF1BP1 |
ENSG00000065060.12 |
| UHRF1BP1L |
ENSG00000111647.8 |
| ANKS1A |
ENSG00000064999.12 |
| HRH1 |
ENSG00000196639.6 |
| LGR5 |
ENSG00000139292.8 |
| TSPAN8 |
ENSG00000127324.4 |
Logistic regression with interaction term \(Y \sim E + I + I*E\)
yei.container <- c()
for (i in 1:length(intermediate.list)) {
cat("##", ps[i])
cat("\n\n")
inter <- intermediate.list[[ps[i]]]
temp.container <- c()
for (t in names(expression.list)) {
expre <- expression.list[[t]]
df <- inner_join(inter, expre, by = "IID")
for (g in gene[, 2]) {
if (sum(abs(expre[, g])) != 0) {
this.expre <- scale(expre[, g])
} else {
this.expre <- expre[, g]
}
data <- data.frame(y = inter$PHENO, i = inter$PRS,
e = this.expre)
model <- glm(y ~ e + i, family = binomial(link = "logit"),
data = data)
stats <- summary(model)
temp.container <- rbind(temp.container, c(ps[i],
t, g, stats$coefficients[2], stats$coefficients[5],
stats$coefficients[11], "Y ~ E + I", "E"))
temp.container <- rbind(temp.container, c(ps[i],
t, g, stats$coefficients[3], stats$coefficients[6],
stats$coefficients[12], "Y ~ E + I", "I"))
model2 <- glm(y ~ e + i + e * i, family = binomial(link = "logit"),
data = data)
stats2 <- summary(model2)
model3 <- lm(i ~ e, data = data)
stats3 <- summary(model3)
temp.container <- rbind(temp.container, c(ps[i],
t, g, stats2$coefficients[2], stats2$coefficients[6],
stats2$coefficients[14], "Y ~ E + I + E*I", "E"))
temp.container <- rbind(temp.container, c(ps[i],
t, g, stats2$coefficients[3], stats2$coefficients[7],
stats2$coefficients[15], "Y ~ E + I + E*I", "I"))
temp.container <- rbind(temp.container, c(ps[i],
t, g, stats2$coefficients[4], stats2$coefficients[8],
stats2$coefficients[16], "Y ~ E + I + E*I", "I*E"))
temp.container <- rbind(temp.container, c(ps[i],
t, g, stats3$coefficients[2], stats3$coefficients[4],
stats3$coefficients[8], "I ~ E", "E (on I)"))
}
}
temp.container <- data.frame(temp.container)
colnames(temp.container) <- c("causal.fraction", "tissue",
"gene", "estmate", "std", "pval", "type", "term")
temp.container <- left_join(temp.container, gene, by = c(gene = "gencode.id"))
temp.container <- cleanDF(temp.container, c("estmate", "std",
"pval"))
plot.e <- temp.container[temp.container$term == "E", ]
ncol <- length(unique(plot.e[!is.na(plot.e$pval), "gene.name"]))/3
if (ncol == floor(ncol)) {
ncol <- floor(ncol)
} else {
ncol <- floor(ncol) + 1
}
dodge <- position_dodge(width = 0.5)
p2 <- ggplot(plot.e[!is.na(plot.e$pval), ], aes(x = tissue,
color = type)) + geom_point(aes(y = estmate), position = dodge) +
geom_errorbar(aes(ymin = estmate - 1.96 * std, ymax = estmate +
1.96 * std), width = 0.1, position = dodge) + geom_hline(yintercept = 0,
color = "black") + coord_flip() + theme(axis.text.x = element_text(angle = 90,
hjust = 1)) + geom_text(aes(y = estmate, label = paste0("pval = ",
formatC(pval, digits = 2, format = "e")), vjust = -0.3,
hjust = 0.3), size = 3, position = dodge) + ggtitle("Marginal effect of E on Y") +
ylab("Regression coefficient") + xlab("Expression Tissue") +
facet_wrap(~gene.name, scales = "free_x", ncol = 3)
subchunkify(p2, fig_asp = ncol)
cat("\n\n")
plot.i <- temp.container[temp.container$term == "I", ]
ncol <- length(unique(plot.i[!is.na(plot.i$pval), "gene.name"]))/3
if (ncol == floor(ncol)) {
ncol <- floor(ncol)
} else {
ncol <- floor(ncol) + 1
}
dodge <- position_dodge(width = 0.5)
p2 <- ggplot(plot.i[!is.na(plot.i$pval), ], aes(x = tissue,
color = type)) + geom_point(aes(y = estmate), position = dodge) +
geom_errorbar(aes(ymin = estmate - 1.96 * std, ymax = estmate +
1.96 * std), width = 0.1, position = dodge) + geom_hline(yintercept = 0,
color = "black") + coord_flip() + theme(axis.text.x = element_text(angle = 90,
hjust = 1)) + geom_text(aes(y = estmate, label = paste0("pval = ",
formatC(pval, digits = 2, format = "e")), vjust = -0.3,
hjust = 0.3), size = 3, position = dodge) + ggtitle("Marginal effect of I on Y") +
ylab("Regression coefficient") + xlab("Expression Tissue") +
facet_wrap(~gene.name, scales = "free_x", ncol = 3)
subchunkify(p2, fig_asp = ncol)
cat("\n\n")
temp.container.nona <- temp.container[!is.na(temp.container$pval),
]
for (g in unique(temp.container.nona$gene.name)) {
cat("###", g, "\n", "\n")
plot.model2 <- temp.container.nona[temp.container.nona$type !=
"Y ~ E + I" & temp.container.nona$gene.name == g,
]
if (nrow(plot.model2) == 0)
next
ratio <- length(unique(plot.model2[!is.na(plot.model2$pval),
"tissue"]))/12
ratio <- max(0.7, ratio)
p2 <- ggplot(plot.model2[!is.na(plot.model2$pval), ],
aes(x = tissue)) + geom_point(aes(y = estmate)) +
geom_errorbar(aes(ymin = estmate - 1.96 * std, ymax = estmate +
1.96 * std), width = 0.1) + geom_hline(yintercept = 0,
color = "black") + coord_flip() + theme(axis.text.x = element_text(angle = 90,
hjust = 1)) + geom_text(aes(y = estmate, label = paste0("pval = ",
formatC(pval, digits = 2, format = "e")), vjust = -0.7,
hjust = 0.5), size = 3, position = dodge) + ggtitle("Logistic regression with interaction") +
ylab("Regression coefficient") + xlab("Expression Tissue") +
facet_wrap(~term, scales = "free_x", ncol = 4)
subchunkify(p2, fig_asp = ratio)
cat("\n\n")
}
cat("\n\n")
}
0.001


UHRF1BP1

RAB38

UHRF1BP1L

ANKS1A

HRH1

LGR5

TSPAN8

0.0003


UHRF1BP1

RAB38

UHRF1BP1L

ANKS1A

HRH1

LGR5

TSPAN8

0.0001


UHRF1BP1

RAB38

UHRF1BP1L

ANKS1A

HRH1

LGR5

TSPAN8
